Data Collection¶

In [ ]:
import pandas as pd
In [ ]:
train_df = pd.read_csv('train.csv')
# test_df = pd.read_csv('test.csv')
In [ ]:
train_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   battery_power  2000 non-null   int64  
 1   blue           2000 non-null   int64  
 2   clock_speed    2000 non-null   float64
 3   dual_sim       2000 non-null   int64  
 4   fc             2000 non-null   int64  
 5   four_g         2000 non-null   int64  
 6   int_memory     2000 non-null   int64  
 7   m_dep          2000 non-null   float64
 8   mobile_wt      2000 non-null   int64  
 9   n_cores        2000 non-null   int64  
 10  pc             2000 non-null   int64  
 11  px_height      2000 non-null   int64  
 12  px_width       2000 non-null   int64  
 13  ram            2000 non-null   int64  
 14  sc_h           2000 non-null   int64  
 15  sc_w           2000 non-null   int64  
 16  talk_time      2000 non-null   int64  
 17  three_g        2000 non-null   int64  
 18  touch_screen   2000 non-null   int64  
 19  wifi           2000 non-null   int64  
 20  price_range    2000 non-null   int64  
dtypes: float64(2), int64(19)
memory usage: 328.3 KB
In [ ]:
train_df.describe()
Out[ ]:
battery_power blue clock_speed dual_sim fc four_g int_memory m_dep mobile_wt n_cores ... px_height px_width ram sc_h sc_w talk_time three_g touch_screen wifi price_range
count 2000.000000 2000.0000 2000.000000 2000.000000 2000.000000 2000.000000 2000.000000 2000.000000 2000.000000 2000.000000 ... 2000.000000 2000.000000 2000.000000 2000.000000 2000.000000 2000.000000 2000.000000 2000.000000 2000.000000 2000.000000
mean 1238.518500 0.4950 1.522250 0.509500 4.309500 0.521500 32.046500 0.501750 140.249000 4.520500 ... 645.108000 1251.515500 2124.213000 12.306500 5.767000 11.011000 0.761500 0.503000 0.507000 1.500000
std 439.418206 0.5001 0.816004 0.500035 4.341444 0.499662 18.145715 0.288416 35.399655 2.287837 ... 443.780811 432.199447 1084.732044 4.213245 4.356398 5.463955 0.426273 0.500116 0.500076 1.118314
min 501.000000 0.0000 0.500000 0.000000 0.000000 0.000000 2.000000 0.100000 80.000000 1.000000 ... 0.000000 500.000000 256.000000 5.000000 0.000000 2.000000 0.000000 0.000000 0.000000 0.000000
25% 851.750000 0.0000 0.700000 0.000000 1.000000 0.000000 16.000000 0.200000 109.000000 3.000000 ... 282.750000 874.750000 1207.500000 9.000000 2.000000 6.000000 1.000000 0.000000 0.000000 0.750000
50% 1226.000000 0.0000 1.500000 1.000000 3.000000 1.000000 32.000000 0.500000 141.000000 4.000000 ... 564.000000 1247.000000 2146.500000 12.000000 5.000000 11.000000 1.000000 1.000000 1.000000 1.500000
75% 1615.250000 1.0000 2.200000 1.000000 7.000000 1.000000 48.000000 0.800000 170.000000 7.000000 ... 947.250000 1633.000000 3064.500000 16.000000 9.000000 16.000000 1.000000 1.000000 1.000000 2.250000
max 1998.000000 1.0000 3.000000 1.000000 19.000000 1.000000 64.000000 1.000000 200.000000 8.000000 ... 1960.000000 1998.000000 3998.000000 19.000000 18.000000 20.000000 1.000000 1.000000 1.000000 3.000000

8 rows × 21 columns

In [ ]:
train_df.hist(figsize=(20,20),color="blue")
Out[ ]:
array([[<Axes: title={'center': 'battery_power'}>,
        <Axes: title={'center': 'blue'}>,
        <Axes: title={'center': 'clock_speed'}>,
        <Axes: title={'center': 'dual_sim'}>,
        <Axes: title={'center': 'fc'}>],
       [<Axes: title={'center': 'four_g'}>,
        <Axes: title={'center': 'int_memory'}>,
        <Axes: title={'center': 'm_dep'}>,
        <Axes: title={'center': 'mobile_wt'}>,
        <Axes: title={'center': 'n_cores'}>],
       [<Axes: title={'center': 'pc'}>,
        <Axes: title={'center': 'px_height'}>,
        <Axes: title={'center': 'px_width'}>,
        <Axes: title={'center': 'ram'}>,
        <Axes: title={'center': 'sc_h'}>],
       [<Axes: title={'center': 'sc_w'}>,
        <Axes: title={'center': 'talk_time'}>,
        <Axes: title={'center': 'three_g'}>,
        <Axes: title={'center': 'touch_screen'}>,
        <Axes: title={'center': 'wifi'}>],
       [<Axes: title={'center': 'price_range'}>, <Axes: >, <Axes: >,
        <Axes: >, <Axes: >]], dtype=object)
No description has been provided for this image

Data Preprocessing¶

Data Cleaning¶

In [ ]:
train_df.isnull().sum()
# test_df.isnull().sum()
Out[ ]:
battery_power    0
blue             0
clock_speed      0
dual_sim         0
fc               0
four_g           0
int_memory       0
m_dep            0
mobile_wt        0
n_cores          0
pc               0
px_height        0
px_width         0
ram              0
sc_h             0
sc_w             0
talk_time        0
three_g          0
touch_screen     0
wifi             0
price_range      0
dtype: int64

Feature Engineering¶

In [ ]:
# Screen Area
train_df['sc_a'] = train_df['sc_h']*train_df['sc_w']
# test_df['sc_a'] = test_df['sc_h']*test_df['sc_w']

Data Transformation¶

In [ ]:
# Checking each feature whether it is categorical or numerical feature
for col in train_df.columns:
    print(train_df[col].value_counts())
battery_power
1872    6
618     6
1589    6
1715    5
1807    5
       ..
660     1
1452    1
1005    1
1372    1
858     1
Name: count, Length: 1094, dtype: int64
blue
0    1010
1     990
Name: count, dtype: int64
clock_speed
0.5    413
2.8     85
2.3     78
2.1     76
1.6     76
2.5     74
0.6     74
1.4     70
1.3     68
1.5     67
2.0     67
1.9     65
0.7     64
2.9     62
1.8     62
1.0     61
1.7     60
2.2     59
0.9     58
2.4     58
0.8     58
1.2     56
2.6     55
2.7     55
1.1     51
3.0     28
Name: count, dtype: int64
dual_sim
1    1019
0     981
Name: count, dtype: int64
fc
0     474
1     245
2     189
3     170
5     139
4     133
6     112
7     100
9      78
8      77
10     62
11     51
12     45
13     40
16     24
15     23
14     20
18     11
17      6
19      1
Name: count, dtype: int64
four_g
1    1043
0     957
Name: count, dtype: int64
int_memory
27    47
16    45
14    45
57    42
2     42
      ..
22    24
38    23
62    21
4     20
59    18
Name: count, Length: 63, dtype: int64
m_dep
0.1    320
0.2    213
0.8    208
0.5    205
0.7    200
0.3    199
0.9    195
0.6    186
0.4    168
1.0    106
Name: count, dtype: int64
mobile_wt
182    28
101    27
185    27
146    26
199    26
       ..
116    10
140     9
120     9
149     9
96      9
Name: count, Length: 121, dtype: int64
n_cores
4    274
7    259
8    256
2    247
3    246
5    246
1    242
6    230
Name: count, dtype: int64
pc
10    122
7     119
9     112
20    110
1     104
14    104
0     101
2      99
17     99
6      95
4      95
3      93
15     92
12     90
8      89
16     88
13     85
19     83
18     82
11     79
5      59
Name: count, dtype: int64
px_height
347    7
179    6
371    6
275    6
674    5
      ..
87     1
648    1
341    1
993    1
483    1
Name: count, Length: 1137, dtype: int64
px_width
874     7
1247    7
1383    6
1463    6
1469    6
       ..
1125    1
1367    1
1569    1
1481    1
1632    1
Name: count, Length: 1109, dtype: int64
ram
1464    4
3142    4
2610    4
2227    4
1229    4
       ..
2312    1
2167    1
3508    1
297     1
3919    1
Name: count, Length: 1562, dtype: int64
sc_h
17    193
12    157
7     151
16    143
14    143
15    135
13    131
11    126
10    125
9     124
19    124
18    120
8     117
6     114
5      97
Name: count, dtype: int64
sc_w
1     210
3     199
4     182
0     180
5     161
2     156
7     132
6     130
8     125
10    107
9      97
11     84
12     68
13     49
14     33
15     31
16     29
17     19
18      8
Name: count, dtype: int64
talk_time
7     124
4     123
16    116
15    115
19    113
6     111
10    105
8     104
11    103
20    102
14    101
13    100
18    100
9     100
2      99
12     99
17     98
3      94
5      93
Name: count, dtype: int64
three_g
1    1523
0     477
Name: count, dtype: int64
touch_screen
1    1006
0     994
Name: count, dtype: int64
wifi
1    1014
0     986
Name: count, dtype: int64
price_range
1    500
2    500
3    500
0    500
Name: count, dtype: int64
sc_a
0      180
24      44
30      39
10      37
32      34
      ... 
64       4
162      4
26       3
95       2
133      2
Name: count, Length: 127, dtype: int64

From above data inspection, we can see that types of data are Categorical and Numerical.

Categorical (7+1) Numerical(14)
blue battery_power
dual_sim clock_speed
four_g fc
three_g int_memory
touch_screen m_dep, mobile_wt
wifi pc
n_cores px_height,px_width
price_range ram
sc_h, sc_w, sc_a
talk_time

Handling numerical data & categorical data¶

In [ ]:
categorical_features = ['price_range','blue','dual_sim','four_g','three_g','touch_screen','wifi','n_cores']
numerical_features = ['battery_power','clock_speed','fc','int_memory','m_dep','mobile_wt','pc','px_height','px_width','ram','sc_h','sc_w','sc_a','talk_time']
In [ ]:
for col in categorical_features:
   train_df[col] = train_df[col].astype('category')

Feature scaling¶

In [ ]:
import seaborn as sns
import matplotlib.pyplot as plt
In [ ]:
fig, axs = plt.subplots(1, len(numerical_features), figsize=(30, 3), layout='constrained')
fig.suptitle('Checking distribution', fontsize=14, fontweight='bold')
for i, col in enumerate(numerical_features): 
    axs[i].hist(train_df[col])
    axs[i].set_title(col)
    axs[i].tick_params(axis='x', labelrotation=45)
    axs[i].tick_params(axis='y', labelsize=6)
plt.show()
No description has been provided for this image

We tend to use Normalized DataFrames because it is not normally distributed.


EDA¶

Univariate analysis¶

In [ ]:
fig, axs = plt.subplots(1, len(numerical_features), figsize=(30, 3), layout='constrained')
fig.suptitle('Distribution', fontsize=14, fontweight='bold')
for i, col in enumerate(numerical_features): 
    axs[i].hist(train_df[col])
    axs[i].set_title(col)
    axs[i].tick_params(axis='x', labelrotation=45)
    axs[i].tick_params(axis='y', labelsize=6)
plt.show()
No description has been provided for this image
In [ ]:
fig, axs = plt.subplots(1,len(categorical_features), figsize=(30, 10), layout='constrained')
fig.suptitle('Distribution', fontsize=14, fontweight='bold')
for i, col in enumerate(categorical_features):
    axs[i].bar(train_df[col].value_counts().index, train_df[col].value_counts())
    axs[i].set_title(col)
    axs[i].tick_params(axis='x', labelrotation=45)
    axs[i].tick_params(axis='y', labelsize=8)
    
plt.show()
No description has been provided for this image

Bivariate Analysis¶

In [ ]:
import seaborn as sns
In [ ]:
fig, axs = plt.subplots(1, len(numerical_features), figsize=(30, 10), constrained_layout=True)
fig.suptitle('Relationship between numerical features and the target variable', fontsize=14, fontweight='bold')

# Initialize an empty handles and labels list to collect legend information
handles, labels = [], []

for i, col in enumerate(numerical_features):
    ax = sns.boxplot(x='price_range', y=col, hue='price_range', data=train_df, ax=axs[i], palette='pastel')
    axs[i].set_title(col)
    axs[i].tick_params(axis='x', labelrotation=45)
    axs[i].tick_params(axis='y', labelsize=8)
    
    # Remove legend from individual subplot
    ax.get_legend().remove()
    
    # Get handles and labels for the legend
    if i == 0:
        h, l = ax.get_legend_handles_labels()
        handles.extend(h)
        labels.extend(l)

# Create a single legend outside the subplots
fig.legend(handles, labels, loc='upper left', ncol=len(train_df['price_range'].unique()), frameon=False)
plt.show()
No description has been provided for this image
In [ ]:
fig, axs = plt.subplots(1, len(categorical_features), figsize=(30, 5), layout='constrained')
fig.suptitle('relationship between categorical features and the target variable', fontsize=14, fontweight='bold')
for i, col in enumerate(categorical_features):
    sns.countplot(x=col, hue='price_range', data=train_df, ax=axs[i])
    axs[i].set_title(f'{col} vs price_range')
    axs[i].tick_params(axis='x')
    axs[i].legend(loc='lower right', title = 'price range')
plt.show()
No description has been provided for this image

Correlation Analysis¶

In [ ]:
numerical_features_target = numerical_features.copy()
numerical_features_target.append('price_range')
In [ ]:
# train_data_numerical = pd.concat([x_train_norm[numerical_features], y_train], axis=1)
# correlation_matrix = train_data_numerical.corr()
correlation_matrix = train_df[numerical_features_target].corr()

correlation_with_target = correlation_matrix['price_range'].drop('price_range')
plt.figure(figsize=(20, 16))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Matrix between Numerical Features and Target Variable',fontweight='bold')
plt.show()

print("Correlation with target variable (price_range):")
print(correlation_with_target)
No description has been provided for this image
Correlation with target variable (price_range):
battery_power    0.200723
clock_speed     -0.006606
fc               0.021998
int_memory       0.044435
m_dep            0.000853
mobile_wt       -0.030302
pc               0.033599
px_height        0.148858
px_width         0.165818
ram              0.917046
sc_h             0.022986
sc_w             0.038711
sc_a             0.041248
talk_time        0.021859
Name: price_range, dtype: float64

More Visualization¶

In [ ]:
# Pairplot for numerical features
sns.pairplot(data=train_df, vars=numerical_features, hue='price_range')
plt.show()
No description has been provided for this image
In [ ]:
fig, axs = plt.subplots(1, len(categorical_features), figsize=(30, 5), layout='constrained')
fig.suptitle('Categorical features portion', fontsize=14, fontweight='bold')
for i, col in enumerate(categorical_features):
    sorted_data = train_df[col].value_counts().sort_index()
    axs[i].pie(sorted_data, labels=sorted_data.index, autopct="%0.2f")
    axs[i].set_title(f'{col}')
    axs[i].legend(loc='lower right')
plt.show()
No description has been provided for this image
In [ ]:
# Histograms of individual numerical features
plt.figure(figsize=(20, 12))
for i, col in enumerate(numerical_features, start=1):
    plt.subplot(4, 4, i)
    sns.histplot(data=train_df , x=col, kde=True, hue='price_range')
    plt.title(col)
plt.tight_layout()
plt.show()
No description has been provided for this image

Model Building¶

Split data into train and test sets¶

In [ ]:
from sklearn.model_selection import train_test_split
# Because test.csv doesn't contain labels or target values, we need to split the train.csv instead.
X=train_df.drop(columns=['price_range'],axis = 1)
y=train_df['price_range']
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

Normalization¶

From above inspection, the data doesn't have normal distribution. Therefore, we need to normalize the data.

In [ ]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
In [ ]:
# Normalization (0-1)
normalizer = MinMaxScaler()
x_train_norm = x_train.copy()
x_test_norm = x_test.copy()

for col in numerical_features:
    # Reshape the data to a 2D array with a single column
    x_train_col = x_train[col].values.reshape(-1, 1)
    x_test_col = x_test[col].values.reshape(-1, 1)
    
    # Fit and transform the training data
    x_train_norm_col = normalizer.fit_transform(x_train_col)
    # Transform the testing data (using the same scaler fitted on training data)
    x_test_norm_col = normalizer.transform(x_test_col)
    
    # Assign the transformed data back to the original DataFrame
    x_train_norm[col] = x_train_norm_col.flatten()
    x_test_norm[col] = x_test_norm_col.flatten()

# Optionally, convert the resulting DataFrames back to pandas DataFrame if needed
x_train_norm = pd.DataFrame(x_train_norm, columns=x_train.columns)
x_test_norm = pd.DataFrame(x_test_norm, columns=x_test.columns)
In [ ]:
x_train_norm.head()
Out[ ]:
battery_power blue clock_speed dual_sim fc four_g int_memory m_dep mobile_wt n_cores ... px_height px_width ram sc_h sc_w talk_time three_g touch_screen wifi sc_a
1264 0.116900 0 0.68 0 0.052632 1 0.596774 0.111111 0.033333 1 ... 0.440816 0.341121 0.483164 0.571429 0.388889 0.222222 1 0 0 0.266082
722 0.437542 0 0.00 0 0.315789 0 0.951613 0.000000 0.075000 7 ... 0.619388 0.508678 0.411545 0.785714 0.333333 0.611111 0 1 1 0.280702
210 0.597194 0 0.52 1 0.000000 0 0.338710 0.000000 0.258333 3 ... 0.108673 0.136849 0.618653 0.428571 0.000000 0.611111 1 0 1 0.000000
252 0.138277 1 0.88 1 0.000000 1 0.983871 1.000000 0.958333 8 ... 0.039796 0.102804 0.511224 0.071429 0.277778 0.277778 1 1 1 0.087719
297 0.553106 1 0.20 0 0.052632 0 0.806452 0.888889 0.841667 6 ... 0.537245 0.639519 0.259754 0.214286 0.333333 0.222222 1 0 1 0.140351

5 rows × 21 columns

In [ ]:
x_train_norm.describe()
Out[ ]:
battery_power clock_speed fc int_memory m_dep mobile_wt pc px_height px_width ram sc_h sc_w talk_time sc_a
count 1600.000000 1600.000000 1600.000000 1600.000000 1600.000000 1600.000000 1600.000000 1600.000000 1600.000000 1600.000000 1600.000000 1600.000000 1600.000000 1600.000000
mean 0.487032 0.409025 0.226579 0.486472 0.447639 0.500224 0.495250 0.330597 0.504592 0.495384 0.523973 0.319549 0.502118 0.233840
std 0.293788 0.326083 0.228478 0.291836 0.321516 0.295802 0.302635 0.227043 0.288759 0.291468 0.298242 0.239981 0.303594 0.221880
min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 0.227121 0.080000 0.052632 0.225806 0.111111 0.241667 0.250000 0.144770 0.254339 0.247060 0.285714 0.111111 0.222222 0.058480
50% 0.474616 0.400000 0.157895 0.483871 0.444444 0.500000 0.500000 0.290816 0.500334 0.499599 0.500000 0.277778 0.500000 0.160819
75% 0.735471 0.680000 0.368421 0.741935 0.777778 0.750000 0.750000 0.482781 0.760681 0.750200 0.785714 0.500000 0.777778 0.350877
max 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000

Standardization¶

This is for only education purposes. They aren't used for computation.

In [ ]:
standardizer = StandardScaler()
x_train_std = x_train.copy()
x_test_std = x_test.copy()

for col in numerical_features:
    # Reshape the data to a 2D array with a single column
    x_train_col = x_train[col].values.reshape(-1, 1)
    x_test_col = x_test[col].values.reshape(-1, 1)
    
    # Fit and transform the training data
    x_train_std_col = standardizer.fit_transform(x_train_col)
    # Transform the testing data (using the same scaler fitted on training data)
    x_test_std_col = standardizer.transform(x_test_col)
    
    # Assign the transformed data back to the original DataFrame
    x_train_std[col] = x_train_std_col.flatten()
    x_test_std[col] = x_test_std_col.flatten()

# Optionally, convert the resulting DataFrames back to pandas DataFrame if needed
x_train_std = pd.DataFrame(x_train_std, columns=x_train.columns)
x_test_std = pd.DataFrame(x_test_std, columns=x_test.columns)
In [ ]:
x_train_std.head()
Out[ ]:
battery_power blue clock_speed dual_sim fc four_g int_memory m_dep mobile_wt n_cores ... px_height px_width ram sc_h sc_w talk_time three_g touch_screen wifi sc_a
1264 -1.260254 0 0.831260 0 -0.761569 1 0.378079 -1.047019 -1.578881 1 ... 0.485609 -0.566291 -0.041939 0.159167 0.289031 -0.922230 1 0 0 0.145360
722 -0.168510 0 -1.254751 0 0.390578 0 1.594344 -1.392712 -1.437977 7 ... 1.272365 0.014156 -0.287736 0.877887 0.057459 0.359122 0 1 1 0.211272
210 0.375088 0 0.340434 1 -0.991998 0 -0.506478 -1.392712 -0.818000 3 ... -0.977756 -1.273927 0.423057 -0.319980 -1.331973 0.359122 1 0 1 -1.054233
252 -1.187471 1 1.444793 1 -0.991998 1 1.704914 1.718529 1.549185 8 ... -1.281219 -1.391866 0.054362 -1.517848 -0.174113 -0.739180 1 1 1 -0.658763
297 0.224973 1 -0.641219 0 -0.761569 0 1.096781 1.372835 1.154654 6 ... 0.910457 0.467413 -0.808678 -1.038701 0.057459 -0.922230 1 0 1 -0.421480

5 rows × 21 columns

In [ ]:
x_train_std.describe()
Out[ ]:
battery_power clock_speed fc int_memory m_dep mobile_wt pc px_height px_width ram sc_h sc_w talk_time sc_a
count 1.600000e+03 1.600000e+03 1.600000e+03 1.600000e+03 1.600000e+03 1.600000e+03 1.600000e+03 1.600000e+03 1.600000e+03 1.600000e+03 1.600000e+03 1.600000e+03 1.600000e+03 1.600000e+03
mean -2.087219e-16 -3.197442e-16 6.883383e-17 -1.376677e-16 1.132427e-16 2.942091e-16 1.088019e-16 -3.996803e-17 -1.953993e-16 2.997602e-17 -7.327472e-17 -8.881784e-18 -1.576517e-16 8.437695e-17
std 1.000313e+00 1.000313e+00 1.000313e+00 1.000313e+00 1.000313e+00 1.000313e+00 1.000313e+00 1.000313e+00 1.000313e+00 1.000313e+00 1.000313e+00 1.000313e+00 1.000313e+00 1.000313e+00
min -1.658286e+00 -1.254751e+00 -9.919980e-01 -1.667458e+00 -1.392712e+00 -1.691604e+00 -1.636973e+00 -1.456553e+00 -1.747996e+00 -1.700150e+00 -1.757421e+00 -1.331973e+00 -1.654431e+00 -1.054233e+00
25% -8.849670e-01 -1.009338e+00 -7.615688e-01 -8.934712e-01 -1.047019e+00 -8.743613e-01 -8.106361e-01 -8.187189e-01 -8.669204e-01 -8.522427e-01 -7.991273e-01 -8.688293e-01 -9.222299e-01 -7.905859e-01
50% -4.227667e-02 -2.768567e-02 -3.007102e-01 -8.914672e-03 -9.938687e-03 -7.573582e-04 1.570039e-02 -1.752653e-01 -1.475109e-02 1.446578e-02 -8.040685e-02 -1.741132e-01 -6.978789e-03 -3.292041e-01
75% 8.459030e-01 8.312604e-01 6.210069e-01 8.756419e-01 1.027142e+00 8.446658e-01 8.420369e-01 6.704970e-01 8.871378e-01 8.745250e-01 8.778871e-01 7.521749e-01 9.082723e-01 5.276477e-01
max 1.746592e+00 1.812913e+00 3.386158e+00 1.760198e+00 1.718529e+00 1.690089e+00 1.668373e+00 2.949278e+00 1.716182e+00 1.731833e+00 1.596608e+00 2.836323e+00 1.640473e+00 3.454126e+00

Now, we have

x_train_norm, x_test_norm and x_train_std, x_test_std

Mul¶

Decision Tree Classification¶

In [ ]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, accuracy_score
In [ ]:
dt = DecisionTreeClassifier(criterion='gini',max_depth=20,random_state=15)
In [ ]:
def evaluate_model(dt_classifier):
    print("Train Accuracy :", accuracy_score(y_train, dt_classifier.predict(x_train)))
    print("Train Confusion Matrix:")
    print(confusion_matrix(y_train, dt_classifier.predict(x_train)))
    print("-"*50)
    print("Test Accuracy :", accuracy_score(y_test, dt_classifier.predict(x_test)))
    print("Test Confusion Matrix:")
    print(confusion_matrix(y_test, dt_classifier.predict(x_test)))
In [ ]:
dt.fit(x_train,y_train)
Out[ ]:
DecisionTreeClassifier(max_depth=20, random_state=15)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier(max_depth=20, random_state=15)
In [ ]:
evaluate_model(dt)
Train Accuracy : 1.0
Train Confusion Matrix:
[[413   0   0   0]
 [  0 400   0   0]
 [  0   0 384   0]
 [  0   0   0 403]]
--------------------------------------------------
Test Accuracy : 0.815
Test Confusion Matrix:
[[78  9  0  0]
 [11 78 11  0]
 [ 0 15 86 15]
 [ 0  0 13 84]]

Hyperparameter tuning¶

In [ ]:
params = {
    'max_depth': [1,2, 3, 5, 10,15, 20],
    'min_samples_leaf': [5, 10, 20, 50, 100],
    'criterion': ["gini", "entropy"]
}
grid_search = GridSearchCV(estimator=dt, 
                           param_grid=params, 
                           cv=4, n_jobs=-1, verbose=1, scoring = "accuracy")
In [ ]:
grid_search.fit(x_train, y_train)
Fitting 4 folds for each of 70 candidates, totalling 280 fits
Out[ ]:
GridSearchCV(cv=4,
             estimator=DecisionTreeClassifier(max_depth=20, random_state=15),
             n_jobs=-1,
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [1, 2, 3, 5, 10, 15, 20],
                         'min_samples_leaf': [5, 10, 20, 50, 100]},
             scoring='accuracy', verbose=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GridSearchCV(cv=4,
             estimator=DecisionTreeClassifier(max_depth=20, random_state=15),
             n_jobs=-1,
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [1, 2, 3, 5, 10, 15, 20],
                         'min_samples_leaf': [5, 10, 20, 50, 100]},
             scoring='accuracy', verbose=1)
DecisionTreeClassifier(max_depth=20, random_state=15)
DecisionTreeClassifier(max_depth=20, random_state=15)
In [ ]:
grid_search.best_estimator_
Out[ ]:
DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_leaf=5,
                       random_state=15)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_leaf=5,
                       random_state=15)
In [ ]:
dt_best = grid_search.best_estimator_
In [ ]:
dt_best = grid_search.best_estimator_
evaluate_model(dt_best)
Train Accuracy : 0.96
Train Confusion Matrix:
[[401  12   0   0]
 [  8 385   7   0]
 [  0  11 359  14]
 [  0   0  12 391]]
--------------------------------------------------
Test Accuracy : 0.885
Test Confusion Matrix:
[[83  4  0  0]
 [ 9 86  5  0]
 [ 0  8 96 12]
 [ 0  0  8 89]]

XGBoost¶

In [ ]:
import xgboost as xgb
In [ ]:
'''# Python API Interface
dtrain_clf = xgb.DMatrix(x_train, y_train, enable_categorical=True)
dtest_clf = xgb.DMatrix(x_test, y_test, enable_categorical=True)
params = {"objective": "multi:softprob", "tree_method": "hist", "num_class": 4, "nthread": 8, "eval_metric":'auc'}
num_round = 1000

bst = xgb.train(params, dtrain_clf, num_round)

results = xgb.cv(
   params, dtrain_clf,
   num_boost_round=num_round,
   nfold=5,
   metrics=["mlogloss", "auc", "merror"],
)
y_prediction = bst.predict(dtest_clf)
y_prediction
results.keys()
results['test-auc-mean'].max()'''
Out[ ]:
'# Python API Interface\ndtrain_clf = xgb.DMatrix(x_train, y_train, enable_categorical=True)\ndtest_clf = xgb.DMatrix(x_test, y_test, enable_categorical=True)\nparams = {"objective": "multi:softprob", "tree_method": "hist", "num_class": 4, "nthread": 8, "eval_metric":\'auc\'}\nnum_round = 1000\n\nbst = xgb.train(params, dtrain_clf, num_round)\n\nresults = xgb.cv(\n   params, dtrain_clf,\n   num_boost_round=num_round,\n   nfold=5,\n   metrics=["mlogloss", "auc", "merror"],\n)\ny_prediction = bst.predict(dtest_clf)\ny_prediction\nresults.keys()\nresults[\'test-auc-mean\'].max()'
In [ ]:
model = xgb.XGBClassifier(n_estimators=1000, learning_rate=0.01,objective="multi:softmax",num_class=4, enable_categorical=True)
model.fit(x_train, y_train, verbose=True, eval_set=[(x_test, y_test)])
[0]	validation_0-mlogloss:1.37237
[1]	validation_0-mlogloss:1.35879
[2]	validation_0-mlogloss:1.34547
[3]	validation_0-mlogloss:1.33245
[4]	validation_0-mlogloss:1.31976
[5]	validation_0-mlogloss:1.30715
[6]	validation_0-mlogloss:1.29477
[7]	validation_0-mlogloss:1.28274
[8]	validation_0-mlogloss:1.27083
[9]	validation_0-mlogloss:1.25943
[10]	validation_0-mlogloss:1.24816
[11]	validation_0-mlogloss:1.23710
[12]	validation_0-mlogloss:1.22641
[13]	validation_0-mlogloss:1.21595
[14]	validation_0-mlogloss:1.20550
[15]	validation_0-mlogloss:1.19525
[16]	validation_0-mlogloss:1.18516
[17]	validation_0-mlogloss:1.17518
[18]	validation_0-mlogloss:1.16540
[19]	validation_0-mlogloss:1.15589
[20]	validation_0-mlogloss:1.14627
[21]	validation_0-mlogloss:1.13708
[22]	validation_0-mlogloss:1.12778
[23]	validation_0-mlogloss:1.11886
[24]	validation_0-mlogloss:1.11000
[25]	validation_0-mlogloss:1.10122
[26]	validation_0-mlogloss:1.09250
[27]	validation_0-mlogloss:1.08414
[28]	validation_0-mlogloss:1.07573
[29]	validation_0-mlogloss:1.06745
[30]	validation_0-mlogloss:1.05945
[31]	validation_0-mlogloss:1.05139
[32]	validation_0-mlogloss:1.04354
[33]	validation_0-mlogloss:1.03579
[34]	validation_0-mlogloss:1.02822
[35]	validation_0-mlogloss:1.02072
[36]	validation_0-mlogloss:1.01343
[37]	validation_0-mlogloss:1.00629
[38]	validation_0-mlogloss:0.99918
[39]	validation_0-mlogloss:0.99225
[40]	validation_0-mlogloss:0.98545
[41]	validation_0-mlogloss:0.97869
[42]	validation_0-mlogloss:0.97195
[43]	validation_0-mlogloss:0.96523
[44]	validation_0-mlogloss:0.95869
[45]	validation_0-mlogloss:0.95224
[46]	validation_0-mlogloss:0.94588
[47]	validation_0-mlogloss:0.93961
[48]	validation_0-mlogloss:0.93341
[49]	validation_0-mlogloss:0.92719
[50]	validation_0-mlogloss:0.92083
[51]	validation_0-mlogloss:0.91468
[52]	validation_0-mlogloss:0.90871
[53]	validation_0-mlogloss:0.90287
[54]	validation_0-mlogloss:0.89689
[55]	validation_0-mlogloss:0.89106
[56]	validation_0-mlogloss:0.88538
[57]	validation_0-mlogloss:0.87960
[58]	validation_0-mlogloss:0.87397
[59]	validation_0-mlogloss:0.86852
[60]	validation_0-mlogloss:0.86296
[61]	validation_0-mlogloss:0.85748
[62]	validation_0-mlogloss:0.85194
[63]	validation_0-mlogloss:0.84620
[64]	validation_0-mlogloss:0.84091
[65]	validation_0-mlogloss:0.83558
[66]	validation_0-mlogloss:0.83024
[67]	validation_0-mlogloss:0.82487
[68]	validation_0-mlogloss:0.81983
[69]	validation_0-mlogloss:0.81466
[70]	validation_0-mlogloss:0.80959
[71]	validation_0-mlogloss:0.80459
[72]	validation_0-mlogloss:0.79963
[73]	validation_0-mlogloss:0.79470
[74]	validation_0-mlogloss:0.78989
[75]	validation_0-mlogloss:0.78518
[76]	validation_0-mlogloss:0.78033
[77]	validation_0-mlogloss:0.77563
[78]	validation_0-mlogloss:0.77102
[79]	validation_0-mlogloss:0.76632
[80]	validation_0-mlogloss:0.76168
[81]	validation_0-mlogloss:0.75713
[82]	validation_0-mlogloss:0.75272
[83]	validation_0-mlogloss:0.74828
[84]	validation_0-mlogloss:0.74393
[85]	validation_0-mlogloss:0.73961
[86]	validation_0-mlogloss:0.73529
[87]	validation_0-mlogloss:0.73096
[88]	validation_0-mlogloss:0.72678
[89]	validation_0-mlogloss:0.72269
[90]	validation_0-mlogloss:0.71851
[91]	validation_0-mlogloss:0.71440
[92]	validation_0-mlogloss:0.71032
[93]	validation_0-mlogloss:0.70633
[94]	validation_0-mlogloss:0.70209
[95]	validation_0-mlogloss:0.69822
[96]	validation_0-mlogloss:0.69410
[97]	validation_0-mlogloss:0.68998
[98]	validation_0-mlogloss:0.68588
[99]	validation_0-mlogloss:0.68224
[100]	validation_0-mlogloss:0.67843
[101]	validation_0-mlogloss:0.67467
[102]	validation_0-mlogloss:0.67092
[103]	validation_0-mlogloss:0.66720
[104]	validation_0-mlogloss:0.66346
[105]	validation_0-mlogloss:0.65987
[106]	validation_0-mlogloss:0.65634
[107]	validation_0-mlogloss:0.65282
[108]	validation_0-mlogloss:0.64950
[109]	validation_0-mlogloss:0.64607
[110]	validation_0-mlogloss:0.64268
[111]	validation_0-mlogloss:0.63944
[112]	validation_0-mlogloss:0.63602
[113]	validation_0-mlogloss:0.63280
[114]	validation_0-mlogloss:0.62956
[115]	validation_0-mlogloss:0.62633
[116]	validation_0-mlogloss:0.62308
[117]	validation_0-mlogloss:0.61983
[118]	validation_0-mlogloss:0.61666
[119]	validation_0-mlogloss:0.61362
[120]	validation_0-mlogloss:0.61057
[121]	validation_0-mlogloss:0.60743
[122]	validation_0-mlogloss:0.60449
[123]	validation_0-mlogloss:0.60145
[124]	validation_0-mlogloss:0.59845
[125]	validation_0-mlogloss:0.59564
[126]	validation_0-mlogloss:0.59264
[127]	validation_0-mlogloss:0.58973
[128]	validation_0-mlogloss:0.58691
[129]	validation_0-mlogloss:0.58409
[130]	validation_0-mlogloss:0.58126
[131]	validation_0-mlogloss:0.57854
[132]	validation_0-mlogloss:0.57586
[133]	validation_0-mlogloss:0.57319
[134]	validation_0-mlogloss:0.57052
[135]	validation_0-mlogloss:0.56776
[136]	validation_0-mlogloss:0.56509
[137]	validation_0-mlogloss:0.56248
[138]	validation_0-mlogloss:0.55989
[139]	validation_0-mlogloss:0.55722
[140]	validation_0-mlogloss:0.55464
[141]	validation_0-mlogloss:0.55213
[142]	validation_0-mlogloss:0.54965
[143]	validation_0-mlogloss:0.54705
[144]	validation_0-mlogloss:0.54469
[145]	validation_0-mlogloss:0.54235
[146]	validation_0-mlogloss:0.54011
[147]	validation_0-mlogloss:0.53793
[148]	validation_0-mlogloss:0.53580
[149]	validation_0-mlogloss:0.53356
[150]	validation_0-mlogloss:0.53145
[151]	validation_0-mlogloss:0.52899
[152]	validation_0-mlogloss:0.52683
[153]	validation_0-mlogloss:0.52472
[154]	validation_0-mlogloss:0.52247
[155]	validation_0-mlogloss:0.52039
[156]	validation_0-mlogloss:0.51822
[157]	validation_0-mlogloss:0.51599
[158]	validation_0-mlogloss:0.51393
[159]	validation_0-mlogloss:0.51174
[160]	validation_0-mlogloss:0.50962
[161]	validation_0-mlogloss:0.50746
[162]	validation_0-mlogloss:0.50545
[163]	validation_0-mlogloss:0.50354
[164]	validation_0-mlogloss:0.50140
[165]	validation_0-mlogloss:0.49944
[166]	validation_0-mlogloss:0.49750
[167]	validation_0-mlogloss:0.49567
[168]	validation_0-mlogloss:0.49360
[169]	validation_0-mlogloss:0.49176
[170]	validation_0-mlogloss:0.48979
[171]	validation_0-mlogloss:0.48799
[172]	validation_0-mlogloss:0.48619
[173]	validation_0-mlogloss:0.48439
[174]	validation_0-mlogloss:0.48242
[175]	validation_0-mlogloss:0.48069
[176]	validation_0-mlogloss:0.47890
[177]	validation_0-mlogloss:0.47721
[178]	validation_0-mlogloss:0.47531
[179]	validation_0-mlogloss:0.47364
[180]	validation_0-mlogloss:0.47198
[181]	validation_0-mlogloss:0.47038
[182]	validation_0-mlogloss:0.46870
[183]	validation_0-mlogloss:0.46709
[184]	validation_0-mlogloss:0.46549
[185]	validation_0-mlogloss:0.46384
[186]	validation_0-mlogloss:0.46221
[187]	validation_0-mlogloss:0.46064
[188]	validation_0-mlogloss:0.45896
[189]	validation_0-mlogloss:0.45740
[190]	validation_0-mlogloss:0.45591
[191]	validation_0-mlogloss:0.45430
[192]	validation_0-mlogloss:0.45276
[193]	validation_0-mlogloss:0.45123
[194]	validation_0-mlogloss:0.44974
[195]	validation_0-mlogloss:0.44831
[196]	validation_0-mlogloss:0.44680
[197]	validation_0-mlogloss:0.44532
[198]	validation_0-mlogloss:0.44394
[199]	validation_0-mlogloss:0.44247
[200]	validation_0-mlogloss:0.44104
[201]	validation_0-mlogloss:0.43972
[202]	validation_0-mlogloss:0.43829
[203]	validation_0-mlogloss:0.43687
[204]	validation_0-mlogloss:0.43538
[205]	validation_0-mlogloss:0.43403
[206]	validation_0-mlogloss:0.43251
[207]	validation_0-mlogloss:0.43121
[208]	validation_0-mlogloss:0.42985
[209]	validation_0-mlogloss:0.42857
[210]	validation_0-mlogloss:0.42713
[211]	validation_0-mlogloss:0.42588
[212]	validation_0-mlogloss:0.42460
[213]	validation_0-mlogloss:0.42318
[214]	validation_0-mlogloss:0.42185
[215]	validation_0-mlogloss:0.42044
[216]	validation_0-mlogloss:0.41918
[217]	validation_0-mlogloss:0.41784
[218]	validation_0-mlogloss:0.41660
[219]	validation_0-mlogloss:0.41530
[220]	validation_0-mlogloss:0.41412
[221]	validation_0-mlogloss:0.41284
[222]	validation_0-mlogloss:0.41154
[223]	validation_0-mlogloss:0.41020
[224]	validation_0-mlogloss:0.40896
[225]	validation_0-mlogloss:0.40749
[226]	validation_0-mlogloss:0.40595
[227]	validation_0-mlogloss:0.40448
[228]	validation_0-mlogloss:0.40303
[229]	validation_0-mlogloss:0.40153
[230]	validation_0-mlogloss:0.40011
[231]	validation_0-mlogloss:0.39869
[232]	validation_0-mlogloss:0.39725
[233]	validation_0-mlogloss:0.39582
[234]	validation_0-mlogloss:0.39441
[235]	validation_0-mlogloss:0.39304
[236]	validation_0-mlogloss:0.39168
[237]	validation_0-mlogloss:0.39029
[238]	validation_0-mlogloss:0.38898
[239]	validation_0-mlogloss:0.38757
[240]	validation_0-mlogloss:0.38627
[241]	validation_0-mlogloss:0.38487
[242]	validation_0-mlogloss:0.38372
[243]	validation_0-mlogloss:0.38253
[244]	validation_0-mlogloss:0.38124
[245]	validation_0-mlogloss:0.38008
[246]	validation_0-mlogloss:0.37891
[247]	validation_0-mlogloss:0.37769
[248]	validation_0-mlogloss:0.37648
[249]	validation_0-mlogloss:0.37534
[250]	validation_0-mlogloss:0.37433
[251]	validation_0-mlogloss:0.37335
[252]	validation_0-mlogloss:0.37231
[253]	validation_0-mlogloss:0.37133
[254]	validation_0-mlogloss:0.37023
[255]	validation_0-mlogloss:0.36927
[256]	validation_0-mlogloss:0.36835
[257]	validation_0-mlogloss:0.36745
[258]	validation_0-mlogloss:0.36651
[259]	validation_0-mlogloss:0.36543
[260]	validation_0-mlogloss:0.36446
[261]	validation_0-mlogloss:0.36337
[262]	validation_0-mlogloss:0.36245
[263]	validation_0-mlogloss:0.36153
[264]	validation_0-mlogloss:0.36059
[265]	validation_0-mlogloss:0.35960
[266]	validation_0-mlogloss:0.35872
[267]	validation_0-mlogloss:0.35775
[268]	validation_0-mlogloss:0.35685
[269]	validation_0-mlogloss:0.35589
[270]	validation_0-mlogloss:0.35506
[271]	validation_0-mlogloss:0.35417
[272]	validation_0-mlogloss:0.35337
[273]	validation_0-mlogloss:0.35253
[274]	validation_0-mlogloss:0.35161
[275]	validation_0-mlogloss:0.35076
[276]	validation_0-mlogloss:0.34993
[277]	validation_0-mlogloss:0.34897
[278]	validation_0-mlogloss:0.34818
[279]	validation_0-mlogloss:0.34726
[280]	validation_0-mlogloss:0.34640
[281]	validation_0-mlogloss:0.34552
[282]	validation_0-mlogloss:0.34476
[283]	validation_0-mlogloss:0.34384
[284]	validation_0-mlogloss:0.34291
[285]	validation_0-mlogloss:0.34218
[286]	validation_0-mlogloss:0.34145
[287]	validation_0-mlogloss:0.34056
[288]	validation_0-mlogloss:0.33971
[289]	validation_0-mlogloss:0.33889
[290]	validation_0-mlogloss:0.33822
[291]	validation_0-mlogloss:0.33732
[292]	validation_0-mlogloss:0.33649
[293]	validation_0-mlogloss:0.33574
[294]	validation_0-mlogloss:0.33484
[295]	validation_0-mlogloss:0.33410
[296]	validation_0-mlogloss:0.33341
[297]	validation_0-mlogloss:0.33263
[298]	validation_0-mlogloss:0.33184
[299]	validation_0-mlogloss:0.33123
[300]	validation_0-mlogloss:0.33039
[301]	validation_0-mlogloss:0.32967
[302]	validation_0-mlogloss:0.32882
[303]	validation_0-mlogloss:0.32819
[304]	validation_0-mlogloss:0.32740
[305]	validation_0-mlogloss:0.32671
[306]	validation_0-mlogloss:0.32588
[307]	validation_0-mlogloss:0.32517
[308]	validation_0-mlogloss:0.32440
[309]	validation_0-mlogloss:0.32379
[310]	validation_0-mlogloss:0.32326
[311]	validation_0-mlogloss:0.32251
[312]	validation_0-mlogloss:0.32186
[313]	validation_0-mlogloss:0.32126
[314]	validation_0-mlogloss:0.32075
[315]	validation_0-mlogloss:0.32007
[316]	validation_0-mlogloss:0.31938
[317]	validation_0-mlogloss:0.31876
[318]	validation_0-mlogloss:0.31808
[319]	validation_0-mlogloss:0.31751
[320]	validation_0-mlogloss:0.31687
[321]	validation_0-mlogloss:0.31621
[322]	validation_0-mlogloss:0.31565
[323]	validation_0-mlogloss:0.31502
[324]	validation_0-mlogloss:0.31438
[325]	validation_0-mlogloss:0.31375
[326]	validation_0-mlogloss:0.31314
[327]	validation_0-mlogloss:0.31259
[328]	validation_0-mlogloss:0.31199
[329]	validation_0-mlogloss:0.31149
[330]	validation_0-mlogloss:0.31084
[331]	validation_0-mlogloss:0.31028
[332]	validation_0-mlogloss:0.30962
[333]	validation_0-mlogloss:0.30907
[334]	validation_0-mlogloss:0.30848
[335]	validation_0-mlogloss:0.30797
[336]	validation_0-mlogloss:0.30740
[337]	validation_0-mlogloss:0.30684
[338]	validation_0-mlogloss:0.30621
[339]	validation_0-mlogloss:0.30564
[340]	validation_0-mlogloss:0.30507
[341]	validation_0-mlogloss:0.30448
[342]	validation_0-mlogloss:0.30389
[343]	validation_0-mlogloss:0.30334
[344]	validation_0-mlogloss:0.30274
[345]	validation_0-mlogloss:0.30219
[346]	validation_0-mlogloss:0.30158
[347]	validation_0-mlogloss:0.30103
[348]	validation_0-mlogloss:0.30045
[349]	validation_0-mlogloss:0.29998
[350]	validation_0-mlogloss:0.29950
[351]	validation_0-mlogloss:0.29892
[352]	validation_0-mlogloss:0.29835
[353]	validation_0-mlogloss:0.29779
[354]	validation_0-mlogloss:0.29719
[355]	validation_0-mlogloss:0.29665
[356]	validation_0-mlogloss:0.29607
[357]	validation_0-mlogloss:0.29547
[358]	validation_0-mlogloss:0.29496
[359]	validation_0-mlogloss:0.29445
[360]	validation_0-mlogloss:0.29400
[361]	validation_0-mlogloss:0.29345
[362]	validation_0-mlogloss:0.29300
[363]	validation_0-mlogloss:0.29243
[364]	validation_0-mlogloss:0.29194
[365]	validation_0-mlogloss:0.29152
[366]	validation_0-mlogloss:0.29093
[367]	validation_0-mlogloss:0.29041
[368]	validation_0-mlogloss:0.28981
[369]	validation_0-mlogloss:0.28940
[370]	validation_0-mlogloss:0.28897
[371]	validation_0-mlogloss:0.28861
[372]	validation_0-mlogloss:0.28806
[373]	validation_0-mlogloss:0.28773
[374]	validation_0-mlogloss:0.28737
[375]	validation_0-mlogloss:0.28679
[376]	validation_0-mlogloss:0.28636
[377]	validation_0-mlogloss:0.28594
[378]	validation_0-mlogloss:0.28541
[379]	validation_0-mlogloss:0.28507
[380]	validation_0-mlogloss:0.28472
[381]	validation_0-mlogloss:0.28432
[382]	validation_0-mlogloss:0.28388
[383]	validation_0-mlogloss:0.28347
[384]	validation_0-mlogloss:0.28306
[385]	validation_0-mlogloss:0.28267
[386]	validation_0-mlogloss:0.28219
[387]	validation_0-mlogloss:0.28186
[388]	validation_0-mlogloss:0.28144
[389]	validation_0-mlogloss:0.28109
[390]	validation_0-mlogloss:0.28079
[391]	validation_0-mlogloss:0.28041
[392]	validation_0-mlogloss:0.28013
[393]	validation_0-mlogloss:0.27981
[394]	validation_0-mlogloss:0.27940
[395]	validation_0-mlogloss:0.27902
[396]	validation_0-mlogloss:0.27866
[397]	validation_0-mlogloss:0.27833
[398]	validation_0-mlogloss:0.27789
[399]	validation_0-mlogloss:0.27760
[400]	validation_0-mlogloss:0.27729
[401]	validation_0-mlogloss:0.27694
[402]	validation_0-mlogloss:0.27649
[403]	validation_0-mlogloss:0.27601
[404]	validation_0-mlogloss:0.27556
[405]	validation_0-mlogloss:0.27512
[406]	validation_0-mlogloss:0.27470
[407]	validation_0-mlogloss:0.27427
[408]	validation_0-mlogloss:0.27385
[409]	validation_0-mlogloss:0.27343
[410]	validation_0-mlogloss:0.27297
[411]	validation_0-mlogloss:0.27261
[412]	validation_0-mlogloss:0.27215
[413]	validation_0-mlogloss:0.27175
[414]	validation_0-mlogloss:0.27139
[415]	validation_0-mlogloss:0.27100
[416]	validation_0-mlogloss:0.27069
[417]	validation_0-mlogloss:0.27026
[418]	validation_0-mlogloss:0.26988
[419]	validation_0-mlogloss:0.26953
[420]	validation_0-mlogloss:0.26916
[421]	validation_0-mlogloss:0.26876
[422]	validation_0-mlogloss:0.26847
[423]	validation_0-mlogloss:0.26806
[424]	validation_0-mlogloss:0.26775
[425]	validation_0-mlogloss:0.26741
[426]	validation_0-mlogloss:0.26703
[427]	validation_0-mlogloss:0.26674
[428]	validation_0-mlogloss:0.26630
[429]	validation_0-mlogloss:0.26601
[430]	validation_0-mlogloss:0.26566
[431]	validation_0-mlogloss:0.26539
[432]	validation_0-mlogloss:0.26515
[433]	validation_0-mlogloss:0.26485
[434]	validation_0-mlogloss:0.26450
[435]	validation_0-mlogloss:0.26421
[436]	validation_0-mlogloss:0.26390
[437]	validation_0-mlogloss:0.26359
[438]	validation_0-mlogloss:0.26331
[439]	validation_0-mlogloss:0.26303
[440]	validation_0-mlogloss:0.26275
[441]	validation_0-mlogloss:0.26247
[442]	validation_0-mlogloss:0.26220
[443]	validation_0-mlogloss:0.26184
[444]	validation_0-mlogloss:0.26155
[445]	validation_0-mlogloss:0.26118
[446]	validation_0-mlogloss:0.26090
[447]	validation_0-mlogloss:0.26059
[448]	validation_0-mlogloss:0.26030
[449]	validation_0-mlogloss:0.26003
[450]	validation_0-mlogloss:0.25975
[451]	validation_0-mlogloss:0.25945
[452]	validation_0-mlogloss:0.25916
[453]	validation_0-mlogloss:0.25877
[454]	validation_0-mlogloss:0.25854
[455]	validation_0-mlogloss:0.25828
[456]	validation_0-mlogloss:0.25789
[457]	validation_0-mlogloss:0.25752
[458]	validation_0-mlogloss:0.25721
[459]	validation_0-mlogloss:0.25690
[460]	validation_0-mlogloss:0.25667
[461]	validation_0-mlogloss:0.25631
[462]	validation_0-mlogloss:0.25604
[463]	validation_0-mlogloss:0.25577
[464]	validation_0-mlogloss:0.25557
[465]	validation_0-mlogloss:0.25529
[466]	validation_0-mlogloss:0.25508
[467]	validation_0-mlogloss:0.25480
[468]	validation_0-mlogloss:0.25449
[469]	validation_0-mlogloss:0.25430
[470]	validation_0-mlogloss:0.25401
[471]	validation_0-mlogloss:0.25378
[472]	validation_0-mlogloss:0.25355
[473]	validation_0-mlogloss:0.25328
[474]	validation_0-mlogloss:0.25307
[475]	validation_0-mlogloss:0.25287
[476]	validation_0-mlogloss:0.25261
[477]	validation_0-mlogloss:0.25233
[478]	validation_0-mlogloss:0.25209
[479]	validation_0-mlogloss:0.25191
[480]	validation_0-mlogloss:0.25173
[481]	validation_0-mlogloss:0.25148
[482]	validation_0-mlogloss:0.25125
[483]	validation_0-mlogloss:0.25103
[484]	validation_0-mlogloss:0.25079
[485]	validation_0-mlogloss:0.25057
[486]	validation_0-mlogloss:0.25032
[487]	validation_0-mlogloss:0.25014
[488]	validation_0-mlogloss:0.24989
[489]	validation_0-mlogloss:0.24971
[490]	validation_0-mlogloss:0.24947
[491]	validation_0-mlogloss:0.24920
[492]	validation_0-mlogloss:0.24898
[493]	validation_0-mlogloss:0.24875
[494]	validation_0-mlogloss:0.24855
[495]	validation_0-mlogloss:0.24834
[496]	validation_0-mlogloss:0.24817
[497]	validation_0-mlogloss:0.24796
[498]	validation_0-mlogloss:0.24777
[499]	validation_0-mlogloss:0.24755
[500]	validation_0-mlogloss:0.24731
[501]	validation_0-mlogloss:0.24710
[502]	validation_0-mlogloss:0.24689
[503]	validation_0-mlogloss:0.24662
[504]	validation_0-mlogloss:0.24648
[505]	validation_0-mlogloss:0.24629
[506]	validation_0-mlogloss:0.24604
[507]	validation_0-mlogloss:0.24586
[508]	validation_0-mlogloss:0.24561
[509]	validation_0-mlogloss:0.24544
[510]	validation_0-mlogloss:0.24530
[511]	validation_0-mlogloss:0.24508
[512]	validation_0-mlogloss:0.24490
[513]	validation_0-mlogloss:0.24474
[514]	validation_0-mlogloss:0.24452
[515]	validation_0-mlogloss:0.24431
[516]	validation_0-mlogloss:0.24414
[517]	validation_0-mlogloss:0.24393
[518]	validation_0-mlogloss:0.24376
[519]	validation_0-mlogloss:0.24356
[520]	validation_0-mlogloss:0.24337
[521]	validation_0-mlogloss:0.24315
[522]	validation_0-mlogloss:0.24298
[523]	validation_0-mlogloss:0.24277
[524]	validation_0-mlogloss:0.24260
[525]	validation_0-mlogloss:0.24239
[526]	validation_0-mlogloss:0.24218
[527]	validation_0-mlogloss:0.24198
[528]	validation_0-mlogloss:0.24179
[529]	validation_0-mlogloss:0.24159
[530]	validation_0-mlogloss:0.24141
[531]	validation_0-mlogloss:0.24123
[532]	validation_0-mlogloss:0.24107
[533]	validation_0-mlogloss:0.24089
[534]	validation_0-mlogloss:0.24068
[535]	validation_0-mlogloss:0.24051
[536]	validation_0-mlogloss:0.24033
[537]	validation_0-mlogloss:0.24016
[538]	validation_0-mlogloss:0.23997
[539]	validation_0-mlogloss:0.23978
[540]	validation_0-mlogloss:0.23959
[541]	validation_0-mlogloss:0.23940
[542]	validation_0-mlogloss:0.23931
[543]	validation_0-mlogloss:0.23912
[544]	validation_0-mlogloss:0.23896
[545]	validation_0-mlogloss:0.23880
[546]	validation_0-mlogloss:0.23862
[547]	validation_0-mlogloss:0.23847
[548]	validation_0-mlogloss:0.23829
[549]	validation_0-mlogloss:0.23814
[550]	validation_0-mlogloss:0.23796
[551]	validation_0-mlogloss:0.23777
[552]	validation_0-mlogloss:0.23760
[553]	validation_0-mlogloss:0.23738
[554]	validation_0-mlogloss:0.23716
[555]	validation_0-mlogloss:0.23698
[556]	validation_0-mlogloss:0.23679
[557]	validation_0-mlogloss:0.23663
[558]	validation_0-mlogloss:0.23650
[559]	validation_0-mlogloss:0.23639
[560]	validation_0-mlogloss:0.23624
[561]	validation_0-mlogloss:0.23606
[562]	validation_0-mlogloss:0.23591
[563]	validation_0-mlogloss:0.23577
[564]	validation_0-mlogloss:0.23566
[565]	validation_0-mlogloss:0.23546
[566]	validation_0-mlogloss:0.23531
[567]	validation_0-mlogloss:0.23513
[568]	validation_0-mlogloss:0.23503
[569]	validation_0-mlogloss:0.23490
[570]	validation_0-mlogloss:0.23480
[571]	validation_0-mlogloss:0.23464
[572]	validation_0-mlogloss:0.23452
[573]	validation_0-mlogloss:0.23440
[574]	validation_0-mlogloss:0.23430
[575]	validation_0-mlogloss:0.23416
[576]	validation_0-mlogloss:0.23398
[577]	validation_0-mlogloss:0.23382
[578]	validation_0-mlogloss:0.23374
[579]	validation_0-mlogloss:0.23353
[580]	validation_0-mlogloss:0.23337
[581]	validation_0-mlogloss:0.23323
[582]	validation_0-mlogloss:0.23313
[583]	validation_0-mlogloss:0.23301
[584]	validation_0-mlogloss:0.23283
[585]	validation_0-mlogloss:0.23266
[586]	validation_0-mlogloss:0.23249
[587]	validation_0-mlogloss:0.23238
[588]	validation_0-mlogloss:0.23220
[589]	validation_0-mlogloss:0.23207
[590]	validation_0-mlogloss:0.23195
[591]	validation_0-mlogloss:0.23188
[592]	validation_0-mlogloss:0.23171
[593]	validation_0-mlogloss:0.23163
[594]	validation_0-mlogloss:0.23146
[595]	validation_0-mlogloss:0.23134
[596]	validation_0-mlogloss:0.23123
[597]	validation_0-mlogloss:0.23107
[598]	validation_0-mlogloss:0.23099
[599]	validation_0-mlogloss:0.23085
[600]	validation_0-mlogloss:0.23075
[601]	validation_0-mlogloss:0.23067
[602]	validation_0-mlogloss:0.23049
[603]	validation_0-mlogloss:0.23043
[604]	validation_0-mlogloss:0.23036
[605]	validation_0-mlogloss:0.23026
[606]	validation_0-mlogloss:0.23016
[607]	validation_0-mlogloss:0.22999
[608]	validation_0-mlogloss:0.22985
[609]	validation_0-mlogloss:0.22968
[610]	validation_0-mlogloss:0.22954
[611]	validation_0-mlogloss:0.22940
[612]	validation_0-mlogloss:0.22927
[613]	validation_0-mlogloss:0.22914
[614]	validation_0-mlogloss:0.22902
[615]	validation_0-mlogloss:0.22891
[616]	validation_0-mlogloss:0.22875
[617]	validation_0-mlogloss:0.22869
[618]	validation_0-mlogloss:0.22855
[619]	validation_0-mlogloss:0.22842
[620]	validation_0-mlogloss:0.22835
[621]	validation_0-mlogloss:0.22821
[622]	validation_0-mlogloss:0.22811
[623]	validation_0-mlogloss:0.22800
[624]	validation_0-mlogloss:0.22788
[625]	validation_0-mlogloss:0.22781
[626]	validation_0-mlogloss:0.22769
[627]	validation_0-mlogloss:0.22762
[628]	validation_0-mlogloss:0.22755
[629]	validation_0-mlogloss:0.22748
[630]	validation_0-mlogloss:0.22738
[631]	validation_0-mlogloss:0.22726
[632]	validation_0-mlogloss:0.22713
[633]	validation_0-mlogloss:0.22707
[634]	validation_0-mlogloss:0.22694
[635]	validation_0-mlogloss:0.22678
[636]	validation_0-mlogloss:0.22664
[637]	validation_0-mlogloss:0.22646
[638]	validation_0-mlogloss:0.22636
[639]	validation_0-mlogloss:0.22632
[640]	validation_0-mlogloss:0.22617
[641]	validation_0-mlogloss:0.22606
[642]	validation_0-mlogloss:0.22602
[643]	validation_0-mlogloss:0.22590
[644]	validation_0-mlogloss:0.22574
[645]	validation_0-mlogloss:0.22563
[646]	validation_0-mlogloss:0.22555
[647]	validation_0-mlogloss:0.22541
[648]	validation_0-mlogloss:0.22524
[649]	validation_0-mlogloss:0.22513
[650]	validation_0-mlogloss:0.22501
[651]	validation_0-mlogloss:0.22484
[652]	validation_0-mlogloss:0.22476
[653]	validation_0-mlogloss:0.22468
[654]	validation_0-mlogloss:0.22453
[655]	validation_0-mlogloss:0.22443
[656]	validation_0-mlogloss:0.22437
[657]	validation_0-mlogloss:0.22424
[658]	validation_0-mlogloss:0.22411
[659]	validation_0-mlogloss:0.22402
[660]	validation_0-mlogloss:0.22392
[661]	validation_0-mlogloss:0.22376
[662]	validation_0-mlogloss:0.22369
[663]	validation_0-mlogloss:0.22357
[664]	validation_0-mlogloss:0.22353
[665]	validation_0-mlogloss:0.22342
[666]	validation_0-mlogloss:0.22337
[667]	validation_0-mlogloss:0.22327
[668]	validation_0-mlogloss:0.22316
[669]	validation_0-mlogloss:0.22307
[670]	validation_0-mlogloss:0.22302
[671]	validation_0-mlogloss:0.22294
[672]	validation_0-mlogloss:0.22287
[673]	validation_0-mlogloss:0.22282
[674]	validation_0-mlogloss:0.22278
[675]	validation_0-mlogloss:0.22273
[676]	validation_0-mlogloss:0.22261
[677]	validation_0-mlogloss:0.22254
[678]	validation_0-mlogloss:0.22247
[679]	validation_0-mlogloss:0.22244
[680]	validation_0-mlogloss:0.22234
[681]	validation_0-mlogloss:0.22224
[682]	validation_0-mlogloss:0.22216
[683]	validation_0-mlogloss:0.22211
[684]	validation_0-mlogloss:0.22205
[685]	validation_0-mlogloss:0.22206
[686]	validation_0-mlogloss:0.22203
[687]	validation_0-mlogloss:0.22192
[688]	validation_0-mlogloss:0.22185
[689]	validation_0-mlogloss:0.22175
[690]	validation_0-mlogloss:0.22171
[691]	validation_0-mlogloss:0.22168
[692]	validation_0-mlogloss:0.22162
[693]	validation_0-mlogloss:0.22156
[694]	validation_0-mlogloss:0.22150
[695]	validation_0-mlogloss:0.22145
[696]	validation_0-mlogloss:0.22137
[697]	validation_0-mlogloss:0.22133
[698]	validation_0-mlogloss:0.22123
[699]	validation_0-mlogloss:0.22117
[700]	validation_0-mlogloss:0.22112
[701]	validation_0-mlogloss:0.22107
[702]	validation_0-mlogloss:0.22102
[703]	validation_0-mlogloss:0.22098
[704]	validation_0-mlogloss:0.22088
[705]	validation_0-mlogloss:0.22082
[706]	validation_0-mlogloss:0.22077
[707]	validation_0-mlogloss:0.22073
[708]	validation_0-mlogloss:0.22063
[709]	validation_0-mlogloss:0.22058
[710]	validation_0-mlogloss:0.22052
[711]	validation_0-mlogloss:0.22052
[712]	validation_0-mlogloss:0.22051
[713]	validation_0-mlogloss:0.22042
[714]	validation_0-mlogloss:0.22035
[715]	validation_0-mlogloss:0.22032
[716]	validation_0-mlogloss:0.22027
[717]	validation_0-mlogloss:0.22019
[718]	validation_0-mlogloss:0.22019
[719]	validation_0-mlogloss:0.22011
[720]	validation_0-mlogloss:0.22000
[721]	validation_0-mlogloss:0.21994
[722]	validation_0-mlogloss:0.21991
[723]	validation_0-mlogloss:0.21981
[724]	validation_0-mlogloss:0.21980
[725]	validation_0-mlogloss:0.21974
[726]	validation_0-mlogloss:0.21968
[727]	validation_0-mlogloss:0.21967
[728]	validation_0-mlogloss:0.21956
[729]	validation_0-mlogloss:0.21952
[730]	validation_0-mlogloss:0.21939
[731]	validation_0-mlogloss:0.21936
[732]	validation_0-mlogloss:0.21934
[733]	validation_0-mlogloss:0.21926
[734]	validation_0-mlogloss:0.21914
[735]	validation_0-mlogloss:0.21906
[736]	validation_0-mlogloss:0.21901
[737]	validation_0-mlogloss:0.21895
[738]	validation_0-mlogloss:0.21888
[739]	validation_0-mlogloss:0.21877
[740]	validation_0-mlogloss:0.21870
[741]	validation_0-mlogloss:0.21865
[742]	validation_0-mlogloss:0.21862
[743]	validation_0-mlogloss:0.21853
[744]	validation_0-mlogloss:0.21847
[745]	validation_0-mlogloss:0.21842
[746]	validation_0-mlogloss:0.21833
[747]	validation_0-mlogloss:0.21827
[748]	validation_0-mlogloss:0.21818
[749]	validation_0-mlogloss:0.21810
[750]	validation_0-mlogloss:0.21803
[751]	validation_0-mlogloss:0.21799
[752]	validation_0-mlogloss:0.21792
[753]	validation_0-mlogloss:0.21788
[754]	validation_0-mlogloss:0.21782
[755]	validation_0-mlogloss:0.21776
[756]	validation_0-mlogloss:0.21776
[757]	validation_0-mlogloss:0.21769
[758]	validation_0-mlogloss:0.21763
[759]	validation_0-mlogloss:0.21759
[760]	validation_0-mlogloss:0.21756
[761]	validation_0-mlogloss:0.21755
[762]	validation_0-mlogloss:0.21750
[763]	validation_0-mlogloss:0.21744
[764]	validation_0-mlogloss:0.21741
[765]	validation_0-mlogloss:0.21735
[766]	validation_0-mlogloss:0.21730
[767]	validation_0-mlogloss:0.21726
[768]	validation_0-mlogloss:0.21722
[769]	validation_0-mlogloss:0.21716
[770]	validation_0-mlogloss:0.21709
[771]	validation_0-mlogloss:0.21706
[772]	validation_0-mlogloss:0.21700
[773]	validation_0-mlogloss:0.21694
[774]	validation_0-mlogloss:0.21687
[775]	validation_0-mlogloss:0.21685
[776]	validation_0-mlogloss:0.21681
[777]	validation_0-mlogloss:0.21673
[778]	validation_0-mlogloss:0.21668
[779]	validation_0-mlogloss:0.21668
[780]	validation_0-mlogloss:0.21664
[781]	validation_0-mlogloss:0.21658
[782]	validation_0-mlogloss:0.21657
[783]	validation_0-mlogloss:0.21652
[784]	validation_0-mlogloss:0.21648
[785]	validation_0-mlogloss:0.21643
[786]	validation_0-mlogloss:0.21635
[787]	validation_0-mlogloss:0.21629
[788]	validation_0-mlogloss:0.21622
[789]	validation_0-mlogloss:0.21614
[790]	validation_0-mlogloss:0.21604
[791]	validation_0-mlogloss:0.21602
[792]	validation_0-mlogloss:0.21592
[793]	validation_0-mlogloss:0.21586
[794]	validation_0-mlogloss:0.21579
[795]	validation_0-mlogloss:0.21573
[796]	validation_0-mlogloss:0.21575
[797]	validation_0-mlogloss:0.21578
[798]	validation_0-mlogloss:0.21577
[799]	validation_0-mlogloss:0.21571
[800]	validation_0-mlogloss:0.21568
[801]	validation_0-mlogloss:0.21562
[802]	validation_0-mlogloss:0.21560
[803]	validation_0-mlogloss:0.21559
[804]	validation_0-mlogloss:0.21553
[805]	validation_0-mlogloss:0.21545
[806]	validation_0-mlogloss:0.21539
[807]	validation_0-mlogloss:0.21535
[808]	validation_0-mlogloss:0.21529
[809]	validation_0-mlogloss:0.21526
[810]	validation_0-mlogloss:0.21521
[811]	validation_0-mlogloss:0.21516
[812]	validation_0-mlogloss:0.21515
[813]	validation_0-mlogloss:0.21508
[814]	validation_0-mlogloss:0.21505
[815]	validation_0-mlogloss:0.21505
[816]	validation_0-mlogloss:0.21501
[817]	validation_0-mlogloss:0.21497
[818]	validation_0-mlogloss:0.21492
[819]	validation_0-mlogloss:0.21490
[820]	validation_0-mlogloss:0.21489
[821]	validation_0-mlogloss:0.21488
[822]	validation_0-mlogloss:0.21488
[823]	validation_0-mlogloss:0.21485
[824]	validation_0-mlogloss:0.21482
[825]	validation_0-mlogloss:0.21479
[826]	validation_0-mlogloss:0.21480
[827]	validation_0-mlogloss:0.21476
[828]	validation_0-mlogloss:0.21474
[829]	validation_0-mlogloss:0.21469
[830]	validation_0-mlogloss:0.21465
[831]	validation_0-mlogloss:0.21467
[832]	validation_0-mlogloss:0.21464
[833]	validation_0-mlogloss:0.21460
[834]	validation_0-mlogloss:0.21461
[835]	validation_0-mlogloss:0.21457
[836]	validation_0-mlogloss:0.21450
[837]	validation_0-mlogloss:0.21444
[838]	validation_0-mlogloss:0.21441
[839]	validation_0-mlogloss:0.21438
[840]	validation_0-mlogloss:0.21434
[841]	validation_0-mlogloss:0.21432
[842]	validation_0-mlogloss:0.21431
[843]	validation_0-mlogloss:0.21424
[844]	validation_0-mlogloss:0.21430
[845]	validation_0-mlogloss:0.21427
[846]	validation_0-mlogloss:0.21424
[847]	validation_0-mlogloss:0.21421
[848]	validation_0-mlogloss:0.21418
[849]	validation_0-mlogloss:0.21413
[850]	validation_0-mlogloss:0.21408
[851]	validation_0-mlogloss:0.21408
[852]	validation_0-mlogloss:0.21408
[853]	validation_0-mlogloss:0.21405
[854]	validation_0-mlogloss:0.21405
[855]	validation_0-mlogloss:0.21399
[856]	validation_0-mlogloss:0.21395
[857]	validation_0-mlogloss:0.21390
[858]	validation_0-mlogloss:0.21387
[859]	validation_0-mlogloss:0.21381
[860]	validation_0-mlogloss:0.21377
[861]	validation_0-mlogloss:0.21372
[862]	validation_0-mlogloss:0.21369
[863]	validation_0-mlogloss:0.21366
[864]	validation_0-mlogloss:0.21361
[865]	validation_0-mlogloss:0.21356
[866]	validation_0-mlogloss:0.21348
[867]	validation_0-mlogloss:0.21341
[868]	validation_0-mlogloss:0.21339
[869]	validation_0-mlogloss:0.21334
[870]	validation_0-mlogloss:0.21332
[871]	validation_0-mlogloss:0.21326
[872]	validation_0-mlogloss:0.21322
[873]	validation_0-mlogloss:0.21320
[874]	validation_0-mlogloss:0.21316
[875]	validation_0-mlogloss:0.21315
[876]	validation_0-mlogloss:0.21311
[877]	validation_0-mlogloss:0.21305
[878]	validation_0-mlogloss:0.21300
[879]	validation_0-mlogloss:0.21298
[880]	validation_0-mlogloss:0.21292
[881]	validation_0-mlogloss:0.21286
[882]	validation_0-mlogloss:0.21286
[883]	validation_0-mlogloss:0.21281
[884]	validation_0-mlogloss:0.21276
[885]	validation_0-mlogloss:0.21273
[886]	validation_0-mlogloss:0.21271
[887]	validation_0-mlogloss:0.21270
[888]	validation_0-mlogloss:0.21265
[889]	validation_0-mlogloss:0.21267
[890]	validation_0-mlogloss:0.21262
[891]	validation_0-mlogloss:0.21262
[892]	validation_0-mlogloss:0.21264
[893]	validation_0-mlogloss:0.21264
[894]	validation_0-mlogloss:0.21262
[895]	validation_0-mlogloss:0.21263
[896]	validation_0-mlogloss:0.21265
[897]	validation_0-mlogloss:0.21261
[898]	validation_0-mlogloss:0.21259
[899]	validation_0-mlogloss:0.21260
[900]	validation_0-mlogloss:0.21258
[901]	validation_0-mlogloss:0.21260
[902]	validation_0-mlogloss:0.21262
[903]	validation_0-mlogloss:0.21260
[904]	validation_0-mlogloss:0.21258
[905]	validation_0-mlogloss:0.21258
[906]	validation_0-mlogloss:0.21261
[907]	validation_0-mlogloss:0.21259
[908]	validation_0-mlogloss:0.21258
[909]	validation_0-mlogloss:0.21264
[910]	validation_0-mlogloss:0.21259
[911]	validation_0-mlogloss:0.21259
[912]	validation_0-mlogloss:0.21261
[913]	validation_0-mlogloss:0.21262
[914]	validation_0-mlogloss:0.21261
[915]	validation_0-mlogloss:0.21264
[916]	validation_0-mlogloss:0.21264
[917]	validation_0-mlogloss:0.21260
[918]	validation_0-mlogloss:0.21258
[919]	validation_0-mlogloss:0.21265
[920]	validation_0-mlogloss:0.21264
[921]	validation_0-mlogloss:0.21265
[922]	validation_0-mlogloss:0.21264
[923]	validation_0-mlogloss:0.21265
[924]	validation_0-mlogloss:0.21261
[925]	validation_0-mlogloss:0.21260
[926]	validation_0-mlogloss:0.21261
[927]	validation_0-mlogloss:0.21262
[928]	validation_0-mlogloss:0.21263
[929]	validation_0-mlogloss:0.21264
[930]	validation_0-mlogloss:0.21268
[931]	validation_0-mlogloss:0.21265
[932]	validation_0-mlogloss:0.21263
[933]	validation_0-mlogloss:0.21254
[934]	validation_0-mlogloss:0.21256
[935]	validation_0-mlogloss:0.21258
[936]	validation_0-mlogloss:0.21257
[937]	validation_0-mlogloss:0.21251
[938]	validation_0-mlogloss:0.21248
[939]	validation_0-mlogloss:0.21246
[940]	validation_0-mlogloss:0.21240
[941]	validation_0-mlogloss:0.21243
[942]	validation_0-mlogloss:0.21236
[943]	validation_0-mlogloss:0.21232
[944]	validation_0-mlogloss:0.21233
[945]	validation_0-mlogloss:0.21231
[946]	validation_0-mlogloss:0.21234
[947]	validation_0-mlogloss:0.21230
[948]	validation_0-mlogloss:0.21232
[949]	validation_0-mlogloss:0.21232
[950]	validation_0-mlogloss:0.21227
[951]	validation_0-mlogloss:0.21230
[952]	validation_0-mlogloss:0.21231
[953]	validation_0-mlogloss:0.21230
[954]	validation_0-mlogloss:0.21233
[955]	validation_0-mlogloss:0.21233
[956]	validation_0-mlogloss:0.21233
[957]	validation_0-mlogloss:0.21234
[958]	validation_0-mlogloss:0.21233
[959]	validation_0-mlogloss:0.21233
[960]	validation_0-mlogloss:0.21234
[961]	validation_0-mlogloss:0.21229
[962]	validation_0-mlogloss:0.21229
[963]	validation_0-mlogloss:0.21227
[964]	validation_0-mlogloss:0.21227
[965]	validation_0-mlogloss:0.21230
[966]	validation_0-mlogloss:0.21229
[967]	validation_0-mlogloss:0.21227
[968]	validation_0-mlogloss:0.21224
[969]	validation_0-mlogloss:0.21220
[970]	validation_0-mlogloss:0.21220
[971]	validation_0-mlogloss:0.21218
[972]	validation_0-mlogloss:0.21216
[973]	validation_0-mlogloss:0.21213
[974]	validation_0-mlogloss:0.21208
[975]	validation_0-mlogloss:0.21205
[976]	validation_0-mlogloss:0.21206
[977]	validation_0-mlogloss:0.21201
[978]	validation_0-mlogloss:0.21200
[979]	validation_0-mlogloss:0.21199
[980]	validation_0-mlogloss:0.21200
[981]	validation_0-mlogloss:0.21198
[982]	validation_0-mlogloss:0.21199
[983]	validation_0-mlogloss:0.21196
[984]	validation_0-mlogloss:0.21198
[985]	validation_0-mlogloss:0.21198
[986]	validation_0-mlogloss:0.21198
[987]	validation_0-mlogloss:0.21193
[988]	validation_0-mlogloss:0.21196
[989]	validation_0-mlogloss:0.21200
[990]	validation_0-mlogloss:0.21196
[991]	validation_0-mlogloss:0.21193
[992]	validation_0-mlogloss:0.21191
[993]	validation_0-mlogloss:0.21192
[994]	validation_0-mlogloss:0.21190
[995]	validation_0-mlogloss:0.21186
[996]	validation_0-mlogloss:0.21185
[997]	validation_0-mlogloss:0.21181
[998]	validation_0-mlogloss:0.21179
[999]	validation_0-mlogloss:0.21175
Out[ ]:
XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=True, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.01, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=1000, n_jobs=None, num_class=4,
              num_parallel_tree=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=True, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.01, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=1000, n_jobs=None, num_class=4,
              num_parallel_tree=None, ...)

Random Forest Classification¶

In [ ]:
from sklearn.ensemble import RandomForestClassifier
In [ ]:
rd = RandomForestClassifier(criterion='gini',random_state=23,max_depth=24,max_samples=100)
In [ ]:
rd.fit(x_train,y_train)
Out[ ]:
RandomForestClassifier(max_depth=24, max_samples=100, random_state=23)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier(max_depth=24, max_samples=100, random_state=23)
In [ ]:
evaluate_model(rd)
Train Accuracy : 0.87375
Train Confusion Matrix:
[[391  22   0   0]
 [ 35 328  37   0]
 [  0  57 307  20]
 [  0   0  31 372]]
--------------------------------------------------
Test Accuracy : 0.8175
Test Confusion Matrix:
[[84  3  0  0]
 [14 75 11  0]
 [ 0 22 81 13]
 [ 0  0 10 87]]

Hyperparameter tuning¶

In [ ]:
params = {
    'max_depth': [1,2, 3, 5, 10,15, 20],
    'max_samples': [5,10,20,30,40,50,60,70,80,90,100,500,1000],
    'criterion': ["gini", "entropy"]
}
grid_search = GridSearchCV(estimator=rd, 
                           param_grid=params, 
                           cv=4, n_jobs=-1, verbose=1, scoring = "accuracy")
In [ ]:
grid_search.fit(x_train, y_train)
Fitting 4 folds for each of 182 candidates, totalling 728 fits
Out[ ]:
GridSearchCV(cv=4,
             estimator=RandomForestClassifier(max_depth=24, max_samples=100,
                                              random_state=23),
             n_jobs=-1,
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [1, 2, 3, 5, 10, 15, 20],
                         'max_samples': [5, 10, 20, 30, 40, 50, 60, 70, 80, 90,
                                         100, 500, 1000]},
             scoring='accuracy', verbose=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GridSearchCV(cv=4,
             estimator=RandomForestClassifier(max_depth=24, max_samples=100,
                                              random_state=23),
             n_jobs=-1,
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [1, 2, 3, 5, 10, 15, 20],
                         'max_samples': [5, 10, 20, 30, 40, 50, 60, 70, 80, 90,
                                         100, 500, 1000]},
             scoring='accuracy', verbose=1)
RandomForestClassifier(max_depth=24, max_samples=100, random_state=23)
RandomForestClassifier(max_depth=24, max_samples=100, random_state=23)
In [ ]:
grid_search.best_estimator_
Out[ ]:
RandomForestClassifier(criterion='entropy', max_depth=20, max_samples=1000,
                       random_state=23)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier(criterion='entropy', max_depth=20, max_samples=1000,
                       random_state=23)
In [ ]:
rd_best = grid_search.best_estimator_
In [ ]:
rd_best = grid_search.best_estimator_
evaluate_model(rd_best)
Train Accuracy : 1.0
Train Confusion Matrix:
[[413   0   0   0]
 [  0 400   0   0]
 [  0   0 384   0]
 [  0   0   0 403]]
--------------------------------------------------
Test Accuracy : 0.875
Test Confusion Matrix:
[[85  2  0  0]
 [ 9 84  7  0]
 [ 0 14 93  9]
 [ 0  0  9 88]]

Model Evaluation¶

Decision Tree Classification¶

In [ ]:
from sklearn.metrics import classification_report
In [ ]:
print(classification_report(y_test, dt_best.predict(x_test)))
              precision    recall  f1-score   support

           0       0.90      0.95      0.93        87
           1       0.88      0.86      0.87       100
           2       0.88      0.83      0.85       116
           3       0.88      0.92      0.90        97

    accuracy                           0.89       400
   macro avg       0.89      0.89      0.89       400
weighted avg       0.88      0.89      0.88       400

In [ ]:
cm1 = confusion_matrix(y_test,dt_best.predict(x_test))
plt.figure(figsize=(10,7))
sns.heatmap(cm1,annot=True)
plt.xlabel('Predicted')
plt.ylabel('True Value')
Out[ ]:
Text(95.72222222222221, 0.5, 'True Value')
No description has been provided for this image

XGBoost¶

In [ ]:
print(model.score(x_train, y_train))
1.0
In [ ]:
print(model.score(x_test, y_test))
0.92
In [ ]:
cm1 = confusion_matrix(y_test,model.predict(x_test))
plt.figure(figsize=(10,7))
sns.heatmap(cm1,annot=True)
plt.xlabel('Predicted')
plt.ylabel('True Value')
Out[ ]:
Text(95.72222222222221, 0.5, 'True Value')
No description has been provided for this image

Random Forest Classification¶

In [ ]:
print(classification_report(y_test, rd_best.predict(x_test)))
              precision    recall  f1-score   support

           0       0.90      0.98      0.94        87
           1       0.84      0.84      0.84       100
           2       0.85      0.80      0.83       116
           3       0.91      0.91      0.91        97

    accuracy                           0.88       400
   macro avg       0.88      0.88      0.88       400
weighted avg       0.87      0.88      0.87       400

In [ ]:
cm1 = confusion_matrix(y_test,rd_best.predict(x_test))
plt.figure(figsize=(10,7))
sns.heatmap(cm1,annot=True)
plt.xlabel('Predicted')
plt.ylabel('True Value')
Out[ ]:
Text(95.72222222222221, 0.5, 'True Value')
No description has been provided for this image
In [ ]: